{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Deep Reinforcement Learning for Optimal Execution of Portfolio Transactions     "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import utils\n",
    "\n",
    "# Get the default financial and AC Model parameters\n",
    "financial_params, ac_params = utils.get_env_param()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>Financial Parameters</caption>\n",
       "<tr>\n",
       "  <th>Annual Volatility:</th>  <td>12%</td> <th>  Bid-Ask Spread:    </th>     <td>0.125</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Daily Volatility:</th>  <td>0.8%</td> <th>  Daily Trading Volume:</th> <td>5,000,000</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.table.SimpleTable'>"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "financial_params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>Almgren and Chriss Model Parameters</caption>\n",
       "<tr>\n",
       "  <th>Total Number of Shares for Agent1 to Sell:</th>        <td>500,000</td> <th>  Fixed Cost of Selling per Share:</th>    <td>$0.062</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Total Number of Shares for Agent2 to Sell:</th>        <td>500,000</td> <th>  Trader's Risk Aversion for Agent 1:</th>  <td>1e-06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Starting Price per Share:</th>                         <td>$50.00</td>  <th>  Trader's Risk Aversion for Agent 2:</th> <td>0.0001</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Price Impact for Each 1% of Daily Volume Traded:</th> <td>$2.5e-06</td> <th>  Permanent Impact Constant:</th>          <td>2.5e-07</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Number of Days to Sell All the Shares:</th>              <td>60</td>    <th>  Single Step Variance:</th>                <td>0.144</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Number of Trades:</th>                                   <td>60</td>    <th>  Time Interval between trades:</th>         <td>1.0</td>  \n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.table.SimpleTable'>"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ac_params"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Episode [100/1300]\tAverage Shortfall for Agent1: $1,172,575.47\n",
      "Episode [100/1300]\tAverage Shortfall for Agent2: $1,183,832.73\n",
      "Episode [200/1300]\tAverage Shortfall for Agent1: $1,281,148.59\n",
      "Episode [200/1300]\tAverage Shortfall for Agent2: $1,281,025.43\n",
      "Episode [300/1300]\tAverage Shortfall for Agent1: $1,281,250.00\n",
      "Episode [300/1300]\tAverage Shortfall for Agent2: $1,281,250.00\n",
      "Episode [400/1300]\tAverage Shortfall for Agent1: $1,281,250.00\n",
      "Episode [400/1300]\tAverage Shortfall for Agent2: $1,281,250.00\n",
      "Episode [500/1300]\tAverage Shortfall for Agent1: $1,281,250.00\n",
      "Episode [500/1300]\tAverage Shortfall for Agent2: $1,281,250.00\n",
      "Episode [600/1300]\tAverage Shortfall for Agent1: $1,281,250.00\n",
      "Episode [600/1300]\tAverage Shortfall for Agent2: $1,281,250.00\n",
      "Episode [700/1300]\tAverage Shortfall for Agent1: $1,227,339.91\n",
      "Episode [700/1300]\tAverage Shortfall for Agent2: $1,253,734.24\n",
      "Episode [800/1300]\tAverage Shortfall for Agent1: $415,623.02\n",
      "Episode [800/1300]\tAverage Shortfall for Agent2: $433,944.85\n",
      "Episode [900/1300]\tAverage Shortfall for Agent1: $314,968.49\n",
      "Episode [900/1300]\tAverage Shortfall for Agent2: $317,854.76\n",
      "Episode [1000/1300]\tAverage Shortfall for Agent1: $318,731.56\n",
      "Episode [1000/1300]\tAverage Shortfall for Agent2: $317,495.71\n",
      "Episode [1100/1300]\tAverage Shortfall for Agent1: $329,135.85\n",
      "Episode [1100/1300]\tAverage Shortfall for Agent2: $333,255.71\n",
      "Episode [1200/1300]\tAverage Shortfall for Agent1: $300,993.44\n",
      "Episode [1200/1300]\tAverage Shortfall for Agent2: $301,320.57\n",
      "Episode [1300/1300]\tAverage Shortfall for Agent1: $294,413.69\n",
      "Episode [1300/1300]\tAverage Shortfall for Agent2: $292,937.04\n",
      "\n",
      "Average Implementation Shortfall for Agent1: $829,225.39 \n",
      "\n",
      "\n",
      "Average Implementation Shortfall for Agent2: $833,877.00 \n",
      "\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "import syntheticChrissAlmgren as sca\n",
    "from ddpg_agent import Agent\n",
    "\n",
    "from collections import deque\n",
    "\n",
    "# Create simulation environment\n",
    "env = sca.MarketEnvironment()\n",
    "\n",
    "# Initialize Feed-forward DNNs for Actor and Critic models. \n",
    "agent1 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 1225)\n",
    "agent2 = Agent(state_size=env.observation_space_dimension(), action_size=env.action_space_dimension(),random_seed = 108)\n",
    "# Set the liquidation time\n",
    "lqt = 60\n",
    "\n",
    "# Set the number of trades\n",
    "n_trades = 60\n",
    "\n",
    "# Set trader's risk aversion\n",
    "tr1 = 1e-6\n",
    "tr2 = 1e-6\n",
    "\n",
    "# Set the number of episodes to run the simulation\n",
    "episodes = 1300\n",
    "shortfall_list = []\n",
    "shortfall_hist1 = np.array([])\n",
    "shortfall_hist2 = np.array([])\n",
    "shortfall_deque1 = deque(maxlen=100)\n",
    "shortfall_deque2 = deque(maxlen=100)\n",
    "for episode in range(episodes): \n",
    "    # Reset the enviroment\n",
    "    cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)\n",
    "\n",
    "    # set the environment to make transactions\n",
    "    env.start_transactions()\n",
    "\n",
    "    for i in range(n_trades + 1):\n",
    "      \n",
    "        # Predict the best action for the current state. \n",
    "        cur_state1 = np.delete(cur_state,8)\n",
    "        cur_state2 = np.delete(cur_state,7)\n",
    "        #print(cur_state[5:])\n",
    "        action1 = agent1.act(cur_state1, add_noise = True)\n",
    "        action2 = agent2.act(cur_state2, add_noise = True)\n",
    "        #print(action1,action2)\n",
    "        # Action is performed and new state, reward, info are received. \n",
    "        new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)\n",
    "        \n",
    "        # current state, action, reward, new state are stored in the experience replay\n",
    "        new_state1 = np.delete(new_state,8)\n",
    "        new_state2 = np.delete(new_state,7)\n",
    "        agent1.step(cur_state1, action1, reward1, new_state1, done1)\n",
    "        agent2.step(cur_state2, action2, reward2, new_state2, done2)\n",
    "        # roll over new state\n",
    "        cur_state = new_state\n",
    "\n",
    "        if info.done1 and info.done2:\n",
    "            shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)\n",
    "            shortfall_deque1.append(info.implementation_shortfall1)\n",
    "            \n",
    "            shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)\n",
    "            shortfall_deque2.append(info.implementation_shortfall2)\n",
    "            break\n",
    "        \n",
    "    if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes\n",
    "        print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        \n",
    "        print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))\n",
    "        shortfall_list.append([np.mean(shortfall_deque1),np.mean(shortfall_deque2)])\n",
    "print('\\nAverage Implementation Shortfall for Agent1: ${:,.2f} \\n'.format(np.mean(shortfall_hist1)))\n",
    "print('\\nAverage Implementation Shortfall for Agent2: ${:,.2f} \\n'.format(np.mean(shortfall_hist2)))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "shortfall = np.array(shortfall_list)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save('1e-6_1e-6_cooporation_shorfall_list.npy',shortfall)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1e-06 1e-06\n",
      "[1. 1.]\n",
      "[0.761694 0.656324]\n",
      "[0.603648 0.454928]\n",
      "[0.44365  0.334226]\n",
      "[0.305346 0.25539 ]\n",
      "[0.20247  0.202642]\n",
      "[0.13316  0.148788]\n",
      "[0.09197 0.10399]\n",
      "[0.064072 0.074902]\n",
      "[0.044238 0.052522]\n",
      "[0.03257  0.036602]\n",
      "[0.02397  0.024466]\n",
      "[0.018556 0.01732 ]\n",
      "[0.013314 0.011942]\n",
      "[0.009696 0.008204]\n",
      "[0.006774 0.005622]\n",
      "[0.004728 0.003898]\n",
      "[0.003236 0.002704]\n",
      "[0.00228  0.001762]\n",
      "[0.00165 0.00114]\n",
      "[0.001234 0.000778]\n",
      "[0.000932 0.000566]\n",
      "[0.000674 0.000392]\n",
      "[0.000506 0.00029 ]\n",
      "[0.000376 0.000212]\n",
      "[0.000294 0.00015 ]\n",
      "[0.000224 0.000108]\n",
      "[1.66e-04 7.40e-05]\n",
      "[1.14e-04 5.40e-05]\n",
      "[8.2e-05 4.2e-05]\n",
      "[5.8e-05 3.4e-05]\n",
      "[4.0e-05 2.8e-05]\n",
      "[2.6e-05 2.2e-05]\n",
      "[1.8e-05 1.8e-05]\n",
      "[1.2e-05 1.4e-05]\n",
      "[8.e-06 1.e-05]\n",
      "[6.e-06 8.e-06]\n",
      "[4.e-06 6.e-06]\n",
      "[2.e-06 4.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "[2.e-06 2.e-06]\n",
      "Episode [1300/1300]\tAverage Shortfall for Agent1: $298,868.72\n",
      "Episode [1300/1300]\tAverage Shortfall for Agent2: $296,739.36\n"
     ]
    }
   ],
   "source": [
    "print(tr1,tr2)\n",
    "cur_state = env.reset(seed = episode, liquid_time = lqt, num_trades = n_trades, lamb1 = tr1,lamb2 = tr2)\n",
    "\n",
    "    # set the environment to make transactions\n",
    "env.start_transactions()\n",
    "\n",
    "trajectory = np.zeros([n_trades+1,2])\n",
    "for i in range(n_trades + 1):\n",
    "    trajectory[i] = cur_state[7:]\n",
    "    \n",
    "    print(cur_state[7:])\n",
    "        # Predict the best action for the current state. \n",
    "    cur_state1 = np.delete(cur_state,8)\n",
    "    cur_state2 = np.delete(cur_state,7)\n",
    "        #print(cur_state[5:])\n",
    "    action1 = agent1.act(cur_state1, add_noise = True)\n",
    "    action2 = agent2.act(cur_state2, add_noise = True)\n",
    "        #print(action1,action2)\n",
    "        # Action is performed and new state, reward, info are received. \n",
    "    new_state, reward1, reward2, done1, done2, info = env.step(action1,action2)\n",
    "        \n",
    "        # current state, action, reward, new state are stored in the experience replay\n",
    "    new_state1 = np.delete(new_state,8)\n",
    "    new_state2 = np.delete(new_state,7)\n",
    "    agent1.step(cur_state1, action1, reward1, new_state1, done1)\n",
    "    agent2.step(cur_state2, action2, reward2, new_state2, done2)\n",
    "        # roll over new state\n",
    "    cur_state = new_state\n",
    "\n",
    "    if info.done1 and info.done2:\n",
    "        shortfall_hist1 = np.append(shortfall_hist1, info.implementation_shortfall1)\n",
    "        shortfall_deque1.append(info.implementation_shortfall1)\n",
    "            \n",
    "        shortfall_hist2 = np.append(shortfall_hist2, info.implementation_shortfall2)\n",
    "        shortfall_deque2.append(info.implementation_shortfall2)\n",
    "        break\n",
    "        \n",
    "if (episode + 1) % 100 == 0: # print average shortfall over last 100 episodes\n",
    "    print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent1: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque1)))        \n",
    "    print('\\rEpisode [{}/{}]\\tAverage Shortfall for Agent2: ${:,.2f}'.format(episode + 1, episodes, np.mean(shortfall_deque2)))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "np.save('1e-6_1e-6_competition_trajectory_1500.npy',trajectory)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "ename": "TypeError",
     "evalue": "reset() got an unexpected keyword argument 'lamb'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-5-407db19f3a21>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m     20\u001b[0m \u001b[0;31m# Plot the trading list and trading trajectory. If show_trl = True, the data frame containing the values of the\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m     21\u001b[0m \u001b[0;31m# trading list and trading trajectory is printed\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 22\u001b[0;31m \u001b[0mutils\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplot_trade_list\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlq_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0ml_time\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnm_trades\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mn_trades\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtr_risk\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mt_risk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mshow_trl\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/Kaggle/finance/utils.py\u001b[0m in \u001b[0;36mplot_trade_list\u001b[0;34m(lq_time, nm_trades, tr_risk, show_trl)\u001b[0m\n\u001b[1;32m    313\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    314\u001b[0m     \u001b[0;31m# Reset the environment with the given parameters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 315\u001b[0;31m     \u001b[0menv\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mliquid_time\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlq_time\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_trades\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnm_trades\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlamb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtr_risk\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    316\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    317\u001b[0m     \u001b[0;31m# Get the trading list from the environment\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mTypeError\u001b[0m: reset() got an unexpected keyword argument 'lamb'"
     ]
    }
   ],
   "source": [
    "%matplotlib inline\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "\n",
    "import utils\n",
    "\n",
    "# We set the default figure size\n",
    "plt.rcParams['figure.figsize'] = [17.0, 7.0]\n",
    "\n",
    "\n",
    "# Set the number of days to sell all shares (i.e. the liquidation time)\n",
    "l_time = 60\n",
    "\n",
    "# Set the number of trades\n",
    "n_trades = 60\n",
    "\n",
    "# Set the trader's risk aversion\n",
    "t_risk = 1e-6\n",
    "\n",
    "# Plot the trading list and trading trajectory. If show_trl = True, the data frame containing the values of the\n",
    "# trading list and trading trajectory is printed\n",
    "utils.plot_trade_list(lq_time = l_time, nm_trades = n_trades, tr_risk = t_risk, show_trl = True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
